#!encoding:utf-8
from lxml import etree
import os
import urllib,urllib2
import sys
url=sys.argv[1]
req_header = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
                'Accept':'text/html;q=0.9,*/*;q=0.8',
                        'Accept-Charset':'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
                                'Accept-Encoding':'gzip',
                                        'Connection':'close',
                                                'Referer':None 
                                                        }
req_timeout = 5
req = urllib2.Request(url,None,req_header)
resp = urllib2.urlopen(req,None,req_timeout)
page= unicode(resp.read(),'utf-8')

# make sure the string pass to lxml must be unicode
page=unicode(html,'utf-8')
print page
selector=etree.HTML(page)
selector=selector.xpath(sys.argv[2])
if sys.argv[2].endswith('text()'):
    for x in selector:
         print x
else:
    for xx in [x.xpath('string(.)') for x in selector]:
        print xx
    
